# 1. Setup ####
# Load necessary library
library("tidyverse")
library("dplyr")
library("texreg") #regression tables
library("logistf") #rare event regression: implements Firth's penalized likelihood method for logistic regression
library("RColorBrewer") #ggplot colors
library("psych") #descriptive statistics

options(scipen = 99)
# Data
data<-read.csv("/Users/moon_p/Library/Mobile Documents/com~apple~CloudDocs/Research/04 Military experience project/II submission/Moon_232492609_replication data.csv")
h_data<-read.csv("/Users/moon_p/Library/Mobile Documents/com~apple~CloudDocs/Research/04 Military experience project/HorowitzStamLeadersIOMIDReplication.tab", sep="\t")
h_data<-h_data %>% 
  mutate(year=as.numeric(year)) %>% 
  select(ccode, year, cinc, warwin)
data<-left_join(data,h_data, by=c("ccode"="ccode", "inyear"="year"))
data<-na.omit(data)

variables_of_interest <- data[, c("Intervention", "milservice", "combat", "milnocombat", "gender", "age", "cinc", "polity", "ColdWar", "warwin")]
descriptive_stats <- describe(variables_of_interest)
print(descriptive_stats)

# 2. Logistic Regression #####
## 2.1 Baseline ####
# Fit the baseline models
log_reg_model1 <- glm(Intervention ~ milservice, data = data, family = binomial)
log_reg_model2 <- glm(Intervention ~ combat, data = data, family = binomial)
log_reg_model3 <- glm(Intervention ~ milnocombat, data = data, family = binomial)

summary(log_reg_model1)
summary(log_reg_model2)
summary(log_reg_model3)

# Create new datasets for prediction
new_data1_base <- data.frame(milservice = c(0, 1))
new_data2_base <- data.frame(combat = c(0, 1))
new_data3_base <- data.frame(milnocombat = c(0, 1))

# Predict probabilities
new_data1_base$predicted_prob <- predict(log_reg_model1, newdata = new_data1_base, type = "response")
new_data2_base$predicted_prob <- predict(log_reg_model2, newdata = new_data2_base, type = "response")
new_data3_base$predicted_prob <- predict(log_reg_model3, newdata = new_data3_base, type = "response")

# Add non-intervention probabilities
new_data1_base$non_intervention_prob <- 1 - new_data1_base$predicted_prob
new_data2_base$non_intervention_prob <- 1 - new_data2_base$predicted_prob
new_data3_base$non_intervention_prob <- 1 - new_data3_base$predicted_prob

# Add model and IV labels
new_data1_base$model <- "Military Service"
new_data1_base$IV_label <- factor(new_data1_base$milservice, labels = c("No Military Service", "Military Service"))

new_data2_base$model <- "Combat Experience"
new_data2_base$IV_label <- factor(new_data2_base$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_base$model <- "Military Service Without Combat Experience"
new_data3_base$IV_label <- factor(new_data3_base$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))


## Creating a plot for only Intervention (DV) ##
# Combine datasets
combined_data_base_single <- rbind(
  new_data1_base[, c("IV_label", "predicted_prob", "model")],
  new_data2_base[, c("IV_label", "predicted_prob", "model")],
  new_data3_base[, c("IV_label", "predicted_prob", "model")])

# Reshape data for plotting
library(tidyr)
plot_data_base_single <- pivot_longer(combined_data_base_single, cols = c(predicted_prob), 
                               names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_base_single$Outcome <- factor(plot_data_base_single$Outcome, levels = c("predicted_prob"), 
                                 labels = c("Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_base_single)

ggplot(plot_data_base_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (Baseline Models)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal()

## Creating a plot for both Intervention and Non-intervention (DV) ##
# Combine datasets
combined_data_base <- rbind(
  new_data1_base[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_base[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_base[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")])

# Reshape data for plotting
library(tidyr)
plot_data_base <- pivot_longer(combined_data_base, cols = c(predicted_prob, non_intervention_prob), 
                               names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_base$Outcome <- factor(plot_data_base$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                                 labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_base)

ggplot(plot_data_base, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (Baseline Models)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

htmlreg(list(log_reg_model1,
             log_reg_model2,
             log_reg_model3), file="moon_logistic_regression.html")


## 2.2 Controls ####
# Fit the models with control variables
log_reg_model1_c <- glm(Intervention ~ milservice + gender + age + polity + cinc + ColdWar + warwin, data = data, family = binomial)
log_reg_model2_c <- glm(Intervention ~ combat  + gender + age + polity + cinc + ColdWar + warwin, data = data, family = binomial)
log_reg_model3_c <- glm(Intervention ~ milnocombat  + gender + age + polity + cinc + ColdWar + warwin, data = data, family = binomial)
log_reg_model4_c <- glm(Intervention ~ combat + milnocombat  + gender + age + polity + cinc + ColdWar + warwin, data = data, family = binomial)

summary(log_reg_model1_c)
summary(log_reg_model2_c)
summary(log_reg_model3_c)
summary(log_reg_model4_c)

# Interaction terms
log_reg_model1_ci <- glm(Intervention ~ milservice + gender + age * polity + cinc + ColdWar + warwin, data = data, family = binomial)
log_reg_model2_ci <- glm(Intervention ~ combat  + gender + age * polity + cinc + ColdWar + warwin, data = data, family = binomial)
log_reg_model3_ci <- glm(Intervention ~ milnocombat  + gender + age * polity + cinc + ColdWar + warwin, data = data, family = binomial)
log_reg_model4_ci <- glm(Intervention ~ combat + milnocombat  + gender + age * polity + cinc + ColdWar + warwin, data = data, family = binomial)

summary(log_reg_model1_ci)
summary(log_reg_model2_ci)
summary(log_reg_model3_ci)
summary(log_reg_model4_ci)

# Testing for Model 4 coefficient differnece
#install.packages("car")
library(car)

linearHypothesis(log_reg_model4_c, "combat - milnocombat = 0")

# Create new datasets for prediction
mean_gender <- mean(data$gender, na.rm = TRUE)
mean_age <- mean(data$age, na.rm = TRUE)
mean_polity <- mean(data$polity, na.rm = TRUE)
mean_cinc <- mean(data$cinc, na.rm = TRUE)
mean_ColdWar <- mean(data$ColdWar, na.rm = TRUE)
mean_warwin <- mean(data$warwin, na.rm = TRUE)

new_data1_c <- data.frame(
  milservice = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

new_data2_c <- data.frame(
  combat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

new_data3_c <- data.frame(
  milnocombat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

# Predict probabilities
new_data1_c$predicted_prob <- predict(log_reg_model1_c, newdata = new_data1_c, type = "response")
new_data2_c$predicted_prob <- predict(log_reg_model2_c, newdata = new_data2_c, type = "response")
new_data3_c$predicted_prob <- predict(log_reg_model3_c, newdata = new_data3_c, type = "response")

# Add non-intervention probabilities
new_data1_c$non_intervention_prob <- 1 - new_data1_c$predicted_prob
new_data2_c$non_intervention_prob <- 1 - new_data2_c$predicted_prob
new_data3_c$non_intervention_prob <- 1 - new_data3_c$predicted_prob

# Add model and IV labels
new_data1_c$model <- "Military Service"
new_data1_c$IV_label <- factor(new_data1_c$milservice, labels = c("No Military Service", "Military Service"))

new_data2_c$model <- "Combat Experience"
new_data2_c$IV_label <- factor(new_data2_c$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_c$model <- "Military Service Without Combat Experience"
new_data3_c$IV_label <- factor(new_data3_c$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))

## Creating a plot for only Intervention (DV) ##
# Combine datasets
combined_data_c_single <- rbind(
  new_data1_c[, c("IV_label", "predicted_prob", "model")],
  new_data2_c[, c("IV_label", "predicted_prob", "model")],
  new_data3_c[, c("IV_label", "predicted_prob", "model")])

# Reshape data for plotting
plot_data_c_single <- pivot_longer(combined_data_c_single, cols = c(predicted_prob), 
                            names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_c_single$Outcome <- factor(plot_data_c_single$Outcome, levels = c("predicted_prob"), 
                              labels = c("Intervention"))

# Print predicted probabilities for models with controls
print(combined_data_c_single)

ggplot(plot_data_c_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (With Controls)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal()

## Creating a plot for both Intervention and Non-intervention (DV) ##
# Combine datasets
combined_data_c <- rbind(
  new_data1_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")])

# Reshape data for plotting
plot_data_c <- pivot_longer(combined_data_c, cols = c(predicted_prob, non_intervention_prob), 
                            names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_c$Outcome <- factor(plot_data_c$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                              labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for models with controls
print(combined_data_c)

ggplot(plot_data_c, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (With Controls)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

htmlreg(list(log_reg_model1_c,
             log_reg_model2_c,
             log_reg_model3_c,
             log_reg_model4_c), file="moon_logistic_regression_control.html")


# 3. Probit #####
## 3.1 Baseline ####
# Fit the baseline probit models
probit_model1 <- glm(Intervention ~ milservice, data = data, family = binomial(link = "probit"))
probit_model2 <- glm(Intervention ~ combat, data = data, family = binomial(link = "probit"))
probit_model3 <- glm(Intervention ~ milnocombat, data = data, family = binomial(link = "probit"))

summary(probit_model1)
summary(probit_model2)
summary(probit_model3)

# Create new datasets for prediction
new_data1_base_probit <- data.frame(milservice = c(0, 1))
new_data2_base_probit <- data.frame(combat = c(0, 1))
new_data3_base_probit <- data.frame(milnocombat = c(0, 1))

# Predict probabilities
new_data1_base_probit$predicted_prob <- predict(probit_model1, newdata = new_data1_base_probit, type = "response")
new_data2_base_probit$predicted_prob <- predict(probit_model2, newdata = new_data2_base_probit, type = "response")
new_data3_base_probit$predicted_prob <- predict(probit_model3, newdata = new_data3_base_probit, type = "response")

# Add non-intervention probabilities
new_data1_base_probit$non_intervention_prob <- 1 - new_data1_base_probit$predicted_prob
new_data2_base_probit$non_intervention_prob <- 1 - new_data2_base_probit$predicted_prob
new_data3_base_probit$non_intervention_prob <- 1 - new_data3_base_probit$predicted_prob

# Add model and IV labels
new_data1_base_probit$model <- "Military Service"
new_data1_base_probit$IV_label <- factor(new_data1_base_probit$milservice, labels = c("No Military Service", "Military Service"))

new_data2_base_probit$model <- "Combat Experience"
new_data2_base_probit$IV_label <- factor(new_data2_base_probit$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_base_probit$model <- "Military Service Without Combat Experience"
new_data3_base_probit$IV_label <- factor(new_data3_base_probit$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))

## Creating a plot with only Intervention (DV) ##
# Combine datasets
combined_data_base_probit_single <- rbind(
  new_data1_base_probit[, c("IV_label", "predicted_prob", "model")],
  new_data2_base_probit[, c("IV_label", "predicted_prob", "model")],
  new_data3_base_probit[, c("IV_label", "predicted_prob", "model")]
)

# Reshape data for plotting
library(tidyr)
plot_data_base_probit_single <- pivot_longer(combined_data_base_probit_single, cols = c(predicted_prob), 
                               names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_base_probit_single$Outcome <- factor(plot_data_base_probit_single$Outcome, levels = c("predicted_prob"), 
                                 labels = c("Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_base_probit_single)

ggplot(plot_data_base_probit_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (Probit Baseline Models)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal()

## Creating a plot with both Intervention and Non-intevention (DV) ##
# Combine datasets
combined_data_base_probit <- rbind(
  new_data1_base_probit[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_base_probit[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_base_probit[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")]
)

# Reshape data for plotting
library(tidyr)
plot_data_base_probit <- pivot_longer(combined_data_base_probit, cols = c(predicted_prob, non_intervention_prob), 
                               names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_base_probit$Outcome <- factor(plot_data_base_probit$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                                 labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_base_probit)

ggplot(plot_data_base_probit, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (Probit Baseline Models)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

## 3.2 Controls ####
# Fit the probit models with control variables
probit_model1_c <- glm(Intervention ~ milservice  + gender + age + polity + cinc + ColdWar + warwin, data = data, family = binomial(link = "probit"))
probit_model2_c <- glm(Intervention ~ combat  + gender + age + polity + cinc + ColdWar + warwin, data = data, family = binomial(link = "probit"))
probit_model3_c <- glm(Intervention ~ milnocombat  + gender + age + polity + cinc + ColdWar + warwin, data = data, family = binomial(link = "probit"))
probit_model4_c <- glm(Intervention ~ combat + milnocombat  + gender + age + polity + cinc + ColdWar + warwin, data = data, family = binomial(link = "probit"))

summary(probit_model1_c)
summary(probit_model2_c)
summary(probit_model3_c)
summary(probit_model4_c)

# Create new datasets for prediction
mean_gender <- mean(data$gender, na.rm = TRUE)
mean_age <- mean(data$age, na.rm = TRUE)
mean_polity <- mean(data$polity, na.rm = TRUE)
mean_cinc <- mean(data$cinc, na.rm = TRUE)
mean_ColdWar <- mean(data$ColdWar, na.rm = TRUE)
mean_warwin <- mean(data$warwin, na.rm = TRUE)

new_data1_c_probit <- data.frame(
  milservice = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

new_data2_c_probit <- data.frame(
  combat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

new_data3_c_probit <- data.frame(
  milnocombat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

# Predict probabilities
new_data1_c_probit$predicted_prob <- predict(probit_model1_c, newdata = new_data1_c, type = "response")
new_data2_c_probit$predicted_prob <- predict(probit_model2_c, newdata = new_data2_c, type = "response")
new_data3_c_probit$predicted_prob <- predict(probit_model3_c, newdata = new_data3_c, type = "response")

# Add non-intervention probabilities
new_data1_c_probit$non_intervention_prob <- 1 - new_data1_c_probit$predicted_prob
new_data2_c_probit$non_intervention_prob <- 1 - new_data2_c_probit$predicted_prob
new_data3_c_probit$non_intervention_prob <- 1 - new_data3_c_probit$predicted_prob

# Add model and IV labels
new_data1_c_probit$model <- "Military Service"
new_data1_c_probit$IV_label <- factor(new_data1_c_probit$milservice, labels = c("No Military Service", "Military Service"))

new_data2_c_probit$model <- "Combat Experience"
new_data2_c_probit$IV_label <- factor(new_data2_c_probit$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_c_probit$model <- "Military Service Without Combat Experience"
new_data3_c_probit$IV_label <- factor(new_data3_c_probit$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))

# Combine datasets
combined_data_c <- rbind(
  new_data1_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")]
)


htmlreg(list(probit_model1_c,
             probit_model2_c,
             probit_model3_c,
             probit_model4_c), file="moon_probit_regression_control.html")


# 4. Rare Event Logistic Regression ####
## 4.1 Baseline ####
# Fit the models without control variables
relogit_model1 <- logistf(Intervention ~ milservice, data = data)
relogit_model2 <- logistf(Intervention ~ combat, data = data)
relogit_model3 <- logistf(Intervention ~ milnocombat, data = data)

summary(relogit_model1)
summary(relogit_model2)
summary(relogit_model3)

# Create new datasets for prediction
new_data1_simple <- data.frame(
  milservice = c(0, 1)
)

new_data2_simple <- data.frame(
  combat = c(0, 1)
)

new_data3_simple <- data.frame(
  milnocombat = c(0, 1)
)


# Predict probabilities
new_data1_simple$predicted_prob <- predict(relogit_model1, newdata = new_data1_simple, type = "response")
new_data2_simple$predicted_prob <- predict(relogit_model2, newdata = new_data2_simple, type = "response")
new_data3_simple$predicted_prob <- predict(relogit_model3, newdata = new_data3_simple, type = "response")

# Add non-intervention probabilities
new_data1_simple$non_intervention_prob <- 1 - new_data1_simple$predicted_prob
new_data2_simple$non_intervention_prob <- 1 - new_data2_simple$predicted_prob
new_data3_simple$non_intervention_prob <- 1 - new_data3_simple$predicted_prob

# Add model and IV labels
new_data1_simple$model <- "Military Service"
new_data1_simple$IV_label <- factor(new_data1_simple$milservice, labels = c("No Military Service", "Military Service"))

new_data2_simple$model <- "Combat Experience"
new_data2_simple$IV_label <- factor(new_data2_simple$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_simple$model <- "Military Service Without Combat Experience"
new_data3_simple$IV_label <- factor(new_data3_simple$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))

## Creating a plot with only Intervention (DV) ##
# Combine datasets
combined_data_simple_single <- rbind(
  new_data1_simple[, c("IV_label", "predicted_prob", "model")],
  new_data2_simple[, c("IV_label", "predicted_prob", "model")],
  new_data3_simple[, c("IV_label", "predicted_prob", "model")]
)

# Reshape data for plotting
library(tidyr)
plot_data_simple_single <- pivot_longer(combined_data_simple_single, cols = c(predicted_prob), 
                                 names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_simple_single$Outcome <- factor(plot_data_simple_single$Outcome, levels = c("predicted_prob"), 
                                   labels = c("Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_simple_single)

ggplot(plot_data_simple_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (with rare event correction)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() 

## Creating a plot with both Intervention and Non-intervention (DV) ##
# Combine datasets
combined_data_simple <- rbind(
  new_data1_simple[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_simple[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_simple[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")]
)

# Reshape data for plotting
library(tidyr)
plot_data_simple <- pivot_longer(combined_data_simple, cols = c(predicted_prob, non_intervention_prob), 
                                 names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_simple$Outcome <- factor(plot_data_simple$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                                   labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_simple)

ggplot(plot_data_simple, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (with rare event correction)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

## 4.2 Controls ####
# Fit the models
relogit_model1_c <- logistf(Intervention ~ milservice + gender + age + polity + cinc + ColdWar + warwin, data = data)
relogit_model2_c <- logistf(Intervention ~ combat + gender + age + polity + cinc + ColdWar + warwin, data = data)
relogit_model3_c <- logistf(Intervention ~ milnocombat + gender + age + polity + cinc + ColdWar + warwin, data = data)

summary(relogit_model1_c)
summary(relogit_model2_c)
summary(relogit_model3_c)

# Interaction term
relogit_model1_ci <- logistf(Intervention ~ milservice + gender + age * polity + cinc + ColdWar + warwin, data = data)
relogit_model2_ci <- logistf(Intervention ~ combat + gender + age * polity + cinc + ColdWar + warwin, data = data)
relogit_model3_ci <- logistf(Intervention ~ milnocombat + gender + age * polity + cinc + ColdWar + warwin, data = data)

summary(relogit_model1_ci)
summary(relogit_model2_ci)
summary(relogit_model3_ci)


# Create new datasets for prediction
mean_gender <- mean(data$gender, na.rm = TRUE)
mean_age <- mean(data$age, na.rm = TRUE)
mean_polity <- mean(data$polity, na.rm = TRUE)
mean_cinc <- mean(data$cinc, na.rm = TRUE)
mean_ColdWar <- mean(data$ColdWar, na.rm = TRUE)
mean_warwin <- mean(data$warwin, na.rm = TRUE)

new_data1 <- data.frame(
  milservice = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

new_data2 <- data.frame(
  combat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

new_data3 <- data.frame(
  milnocombat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar,
  warwin = mean_warwin
)

# Predict probabilities
new_data1$predicted_prob <- predict(relogit_model1_c, newdata = new_data1, type = "response")
new_data2$predicted_prob <- predict(relogit_model2_c, newdata = new_data2, type = "response")
new_data3$predicted_prob <- predict(relogit_model3_c, newdata = new_data3, type = "response")

# Add non-intervention probabilities
new_data1$non_intervention_prob <- 1 - new_data1$predicted_prob
new_data2$non_intervention_prob <- 1 - new_data2$predicted_prob
new_data3$non_intervention_prob <- 1 - new_data3$predicted_prob

# Add model and IV labels
new_data1$model <- "Military Service"
new_data1$IV_label <- factor(new_data1$milservice, labels = c("No Military Service", "Military Service"))

new_data2$model <- "Combat Experience"
new_data2$IV_label <- factor(new_data2$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3$model <- "Military Service Without Combat Experience"
new_data3$IV_label <- factor(new_data3$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))


## Creating a plot for only Intervention (DV) ##
combined_data_single <- rbind(
  new_data1[, c("IV_label", "predicted_prob", "model")],
  new_data2[, c("IV_label", "predicted_prob", "model")],
  new_data3[, c("IV_label", "predicted_prob", "model")]
)

# Reshape data for plotting
plot_data_single <- pivot_longer(combined_data_single, cols = c(predicted_prob), 
                          names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_single$Outcome <- factor(plot_data_single$Outcome, levels = c("predicted_prob"), 
                            labels = c("Intervention"))

# Print predicted probabilities for models with controls
print(combined_data_single)

ggplot(plot_data_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (With Controls)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() 

## Creating a plot for both Intervention and Non-intervention (DV) ##
# Combine datasets
combined_data <- rbind(
  new_data1[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")]
)

# Reshape data for plotting
plot_data <- pivot_longer(combined_data, cols = c(predicted_prob, non_intervention_prob), 
                          names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data$Outcome <- factor(plot_data$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                            labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for models with controls
print(combined_data)

ggplot(plot_data, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (With Controls)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

## Creating a coefficient chart for relogit model
#install.packages("sjPlot")
#install.packages("broom")
library(sjPlot)
library(broom)
tab_model(relogit_model1_c, relogit_model2_c, relogit_model3_c, show.se = TRUE, show.p = TRUE, show.est = TRUE, 
          transform = NULL, file = "relogit_regression_results.html")
# Extract coefficients and compute odds ratios
coef1 <- exp(coef(relogit_model1_c))
coef2 <- exp(coef(relogit_model2_c))
coef3 <- exp(coef(relogit_model3_c))

# Extract standard errors for odds ratios (approximated via delta method)
se1 <- sqrt(diag(vcov(relogit_model1_c))) * coef1
se2 <- sqrt(diag(vcov(relogit_model2_c))) * coef2
se3 <- sqrt(diag(vcov(relogit_model3_c))) * coef3

# Read the HTML content
html_content <- readLines("relogit_regression_results.html")

# Create the odds ratios HTML string
odds_ratios_html <- paste0(
  "<h3>Odds Ratios and Standard Errors</h3>",
  "<table>",
  "<tr><th>Model</th><th>Variable</th><th>Odds Ratio</th><th>Standard Error</th></tr>",
  
  paste0(
    "<tr><td>Model 1</td><td>", names(coef1), "</td><td>", round(coef1, 3), "</td><td>", round(se1, 3), "</td></tr>",
    collapse = ""
  ),
  paste0(
    "<tr><td>Model 2</td><td>", names(coef2), "</td><td>", round(coef2, 3), "</td><td>", round(se2, 3), "</td></tr>",
    collapse = ""
  ),
  paste0(
    "<tr><td>Model 3</td><td>", names(coef3), "</td><td>", round(coef3, 3), "</td><td>", round(se3, 3), "</td></tr>",
    collapse = ""
  ),
  "</table>"
)

# Insert the odds ratios HTML before the closing </body> tag
html_content <- sub("</body>", paste0(odds_ratios_html, "</body>"), html_content)

# Write the updated HTML content back to the file
writeLines(html_content, "combined_regression_results.html")

### Model including both combat experience and no military experience (relogit) ##
# Adding variable: Civilian
data$civilian <- NA
data$civilian <- ifelse(data$milservice == 0, 1, 0)

# Fit the models
relogit_combinedmodel1_c <- logistf(Intervention ~ combat + civilian + gender + age + polity + cinc + ColdWar + warwin, data = data)
summary(relogit_combinedmodel1_c)

relogit_combinedmodel2_c <- logistf(Intervention ~ combat + milnocombat + gender + age + polity + cinc + ColdWar + warwin, data = data)
summary(relogit_combinedmodel2_c)

# Perform Breusch-Pagan test
#install.packages("lmtest")
library(lmtest)
bptest(relogit_model1_c)
bptest(relogit_model2_c)
bptest(relogit_model3_c)


##### Robustness check using only COW Intrastate war data set #####

# 1. Setup ####
# Load necessary library
library("tidyverse")
library("texreg") #regression tables
library("logistf") #rare event regression: implements Firth's penalized likelihood method for logistic regression
library("RColorBrewer") #ggplot colors
library("psych") #descriptive statistics

options(scipen = 99)
# Data
data<-read.csv("/Users/moon_p/Library/Mobile Documents/com~apple~CloudDocs/Research/04 Military experience project/Moon_intra_final 2.csv")
h_data<-read.csv("/Users/moon_p/Library/Mobile Documents/com~apple~CloudDocs/Research/04 Military experience project/HorowitzStamLeadersIOMIDReplication.tab", sep="\t")
h_data<-h_data %>% 
  mutate(year=as.numeric(year)) %>% 
  select(ccode, year, cinc, warwin)
data<-left_join(data,h_data, by=c("ccode"="ccode", "inyear"="year"))
data<-na.omit(data)

variables_of_interest <- data[, c("Intervention_Intra_only", "milservice", "combat", "milnocombat", "gender", "age", "cinc", "polity", "ColdWar", "warwin")]
descriptive_stats <- describe(variables_of_interest)
print(descriptive_stats)

# 2. Logistic Regression #####
## 2.1 Baseline ####
# Fit the baseline models
log_reg_model1 <- glm(Intervention_Intra_only ~ milservice, data = data, family = binomial)
log_reg_model2 <- glm(Intervention_Intra_only ~ combat, data = data, family = binomial)
log_reg_model3 <- glm(Intervention_Intra_only ~ milnocombat, data = data, family = binomial)

summary(log_reg_model1)
summary(log_reg_model2)
summary(log_reg_model3)

# Create new datasets for prediction
new_data1_base <- data.frame(milservice = c(0, 1))
new_data2_base <- data.frame(combat = c(0, 1))
new_data3_base <- data.frame(milnocombat = c(0, 1))

# Predict probabilities
new_data1_base$predicted_prob <- predict(log_reg_model1, newdata = new_data1_base, type = "response")
new_data2_base$predicted_prob <- predict(log_reg_model2, newdata = new_data2_base, type = "response")
new_data3_base$predicted_prob <- predict(log_reg_model3, newdata = new_data3_base, type = "response")

# Add non-intervention probabilities
new_data1_base$non_intervention_prob <- 1 - new_data1_base$predicted_prob
new_data2_base$non_intervention_prob <- 1 - new_data2_base$predicted_prob
new_data3_base$non_intervention_prob <- 1 - new_data3_base$predicted_prob

# Add model and IV labels
new_data1_base$model <- "Military Service"
new_data1_base$IV_label <- factor(new_data1_base$milservice, labels = c("No Military Service", "Military Service"))

new_data2_base$model <- "Combat Experience"
new_data2_base$IV_label <- factor(new_data2_base$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_base$model <- "Military Service Without Combat Experience"
new_data3_base$IV_label <- factor(new_data3_base$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))


## Creating a plot for only Intervention (DV) ##
# Combine datasets
combined_data_base_single <- rbind(
  new_data1_base[, c("IV_label", "predicted_prob", "model")],
  new_data2_base[, c("IV_label", "predicted_prob", "model")],
  new_data3_base[, c("IV_label", "predicted_prob", "model")])

# Reshape data for plotting
library(tidyr)
plot_data_base_single <- pivot_longer(combined_data_base_single, cols = c(predicted_prob), 
                                      names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_base_single$Outcome <- factor(plot_data_base_single$Outcome, levels = c("predicted_prob"), 
                                        labels = c("Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_base_single)

ggplot(plot_data_base_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (Baseline Models)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal()

## Creating a plot for both Intervention and Non-intervention (DV) ##
# Combine datasets
combined_data_base <- rbind(
  new_data1_base[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_base[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_base[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")])

# Reshape data for plotting
library(tidyr)
plot_data_base <- pivot_longer(combined_data_base, cols = c(predicted_prob, non_intervention_prob), 
                               names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_base$Outcome <- factor(plot_data_base$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                                 labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_base)

ggplot(plot_data_base, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (Baseline Models)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

htmlreg(list(log_reg_model1,
             log_reg_model2,
             log_reg_model3), file="moon_logistic_regression_robustcheck.html")


## 2.2 Controls ####
# Fit the models with control variables
log_reg_model1_c <- glm(Intervention_Intra_only ~ milservice  + age + polity + cinc + ColdWar + warwin, data = data, family = binomial)
log_reg_model2_c <- glm(Intervention_Intra_only ~ combat  + age + polity + cinc + ColdWar+ warwin, data = data, family = binomial)
log_reg_model3_c <- glm(Intervention_Intra_only ~ milnocombat  + age + polity + cinc + ColdWar+ warwin, data = data, family = binomial)

summary(log_reg_model1_c)
summary(log_reg_model2_c)
summary(log_reg_model3_c)

# Create new datasets for prediction
mean_gender <- mean(data$gender, na.rm = TRUE)
mean_age <- mean(data$age, na.rm = TRUE)
mean_polity <- mean(data$polity, na.rm = TRUE)
mean_cinc <- mean(data$cinc, na.rm = TRUE)
mean_ColdWar <- mean(data$ColdWar, na.rm = TRUE)

new_data1_c <- data.frame(
  milservice = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

new_data2_c <- data.frame(
  combat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

new_data3_c <- data.frame(
  milnocombat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

# Predict probabilities
new_data1_c$predicted_prob <- predict(log_reg_model1_c, newdata = new_data1_c, type = "response")
new_data2_c$predicted_prob <- predict(log_reg_model2_c, newdata = new_data2_c, type = "response")
new_data3_c$predicted_prob <- predict(log_reg_model3_c, newdata = new_data3_c, type = "response")

# Add non-intervention probabilities
new_data1_c$non_intervention_prob <- 1 - new_data1_c$predicted_prob
new_data2_c$non_intervention_prob <- 1 - new_data2_c$predicted_prob
new_data3_c$non_intervention_prob <- 1 - new_data3_c$predicted_prob

# Add model and IV labels
new_data1_c$model <- "Military Service"
new_data1_c$IV_label <- factor(new_data1_c$milservice, labels = c("No Military Service", "Military Service"))

new_data2_c$model <- "Combat Experience"
new_data2_c$IV_label <- factor(new_data2_c$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_c$model <- "Military Service Without Combat Experience"
new_data3_c$IV_label <- factor(new_data3_c$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))

## Creating a plot for only Intervention (DV) ##
# Combine datasets
combined_data_c_single <- rbind(
  new_data1_c[, c("IV_label", "predicted_prob", "model")],
  new_data2_c[, c("IV_label", "predicted_prob", "model")],
  new_data3_c[, c("IV_label", "predicted_prob", "model")])

# Reshape data for plotting
plot_data_c_single <- pivot_longer(combined_data_c_single, cols = c(predicted_prob), 
                                   names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_c_single$Outcome <- factor(plot_data_c_single$Outcome, levels = c("predicted_prob"), 
                                     labels = c("Intervention"))

# Print predicted probabilities for models with controls
print(combined_data_c_single)

ggplot(plot_data_c_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (With Controls)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal()

## Creating a plot for both Intervention and Non-intervention (DV) ##
# Combine datasets
combined_data_c <- rbind(
  new_data1_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")])

# Reshape data for plotting
plot_data_c <- pivot_longer(combined_data_c, cols = c(predicted_prob, non_intervention_prob), 
                            names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_c$Outcome <- factor(plot_data_c$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                              labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for models with controls
print(combined_data_c)

ggplot(plot_data_c, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (With Controls)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

htmlreg(list(log_reg_model1,
             log_reg_model2,
             log_reg_model3,
             log_reg_model1_c,
             log_reg_model2_c,
             log_reg_model3_c), file="moon_logistic_regression_control_robustcheck.html")


# 3. Probit #####
## 3.1 Baseline ####
# Fit the baseline probit models
probit_model1 <- glm(Intervention_Intra_only ~ milservice, data = data, family = binomial(link = "probit"))
probit_model2 <- glm(Intervention_Intra_only ~ combat, data = data, family = binomial(link = "probit"))
probit_model3 <- glm(Intervention_Intra_only ~ milnocombat, data = data, family = binomial(link = "probit"))

summary(probit_model1)
summary(probit_model2)
summary(probit_model3)

# Create new datasets for prediction
new_data1_base_probit <- data.frame(milservice = c(0, 1))
new_data2_base_probit <- data.frame(combat = c(0, 1))
new_data3_base_probit <- data.frame(milnocombat = c(0, 1))

# Predict probabilities
new_data1_base_probit$predicted_prob <- predict(probit_model1, newdata = new_data1_base_probit, type = "response")
new_data2_base_probit$predicted_prob <- predict(probit_model2, newdata = new_data2_base_probit, type = "response")
new_data3_base_probit$predicted_prob <- predict(probit_model3, newdata = new_data3_base_probit, type = "response")

# Add non-intervention probabilities
new_data1_base_probit$non_intervention_prob <- 1 - new_data1_base_probit$predicted_prob
new_data2_base_probit$non_intervention_prob <- 1 - new_data2_base_probit$predicted_prob
new_data3_base_probit$non_intervention_prob <- 1 - new_data3_base_probit$predicted_prob

# Add model and IV labels
new_data1_base_probit$model <- "Military Service"
new_data1_base_probit$IV_label <- factor(new_data1_base_probit$milservice, labels = c("No Military Service", "Military Service"))

new_data2_base_probit$model <- "Combat Experience"
new_data2_base_probit$IV_label <- factor(new_data2_base_probit$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_base_probit$model <- "Military Service Without Combat Experience"
new_data3_base_probit$IV_label <- factor(new_data3_base_probit$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))

## Creating a plot with only Intervention (DV) ##
# Combine datasets
combined_data_base_probit_single <- rbind(
  new_data1_base_probit[, c("IV_label", "predicted_prob", "model")],
  new_data2_base_probit[, c("IV_label", "predicted_prob", "model")],
  new_data3_base_probit[, c("IV_label", "predicted_prob", "model")]
)

# Reshape data for plotting
library(tidyr)
plot_data_base_probit_single <- pivot_longer(combined_data_base_probit_single, cols = c(predicted_prob), 
                                             names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_base_probit_single$Outcome <- factor(plot_data_base_probit_single$Outcome, levels = c("predicted_prob"), 
                                               labels = c("Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_base_probit_single)

ggplot(plot_data_base_probit_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (Probit Baseline Models)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal()

## Creating a plot with both Intervention and Non-intevention (DV) ##
# Combine datasets
combined_data_base_probit <- rbind(
  new_data1_base_probit[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_base_probit[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_base_probit[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")]
)

# Reshape data for plotting
library(tidyr)
plot_data_base_probit <- pivot_longer(combined_data_base_probit, cols = c(predicted_prob, non_intervention_prob), 
                                      names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_base_probit$Outcome <- factor(plot_data_base_probit$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                                        labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_base_probit)

ggplot(plot_data_base_probit, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (Probit Baseline Models)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

## 3.2 Controls ####
# Fit the probit models with control variables
probit_model1_c <- glm(Intervention_Intra_only ~ milservice  + age + polity + cinc + ColdWar, data = data, family = binomial(link = "probit"))
probit_model2_c <- glm(Intervention_Intra_only ~ combat  + age + polity + cinc + ColdWar, data = data, family = binomial(link = "probit"))
probit_model3_c <- glm(Intervention_Intra_only ~ milnocombat  + age + polity + cinc + ColdWar, data = data, family = binomial(link = "probit"))

summary(probit_model1_c)
summary(probit_model2_c)
summary(probit_model3_c)

# Create new datasets for prediction
mean_gender <- mean(data$gender, na.rm = TRUE)
mean_age <- mean(data$age, na.rm = TRUE)
mean_polity <- mean(data$polity, na.rm = TRUE)
mean_cinc <- mean(data$cinc, na.rm = TRUE)
mean_ColdWar <- mean(data$ColdWar, na.rm = TRUE)

new_data1_c_probit <- data.frame(
  milservice = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

new_data2_c_probit <- data.frame(
  combat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

new_data3_c_probit <- data.frame(
  milnocombat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

# Predict probabilities
new_data1_c_probit$predicted_prob <- predict(probit_model1_c, newdata = new_data1_c, type = "response")
new_data2_c_probit$predicted_prob <- predict(probit_model2_c, newdata = new_data2_c, type = "response")
new_data3_c_probit$predicted_prob <- predict(probit_model3_c, newdata = new_data3_c, type = "response")

# Add non-intervention probabilities
new_data1_c_probit$non_intervention_prob <- 1 - new_data1_c_probit$predicted_prob
new_data2_c_probit$non_intervention_prob <- 1 - new_data2_c_probit$predicted_prob
new_data3_c_probit$non_intervention_prob <- 1 - new_data3_c_probit$predicted_prob

# Add model and IV labels
new_data1_c_probit$model <- "Military Service"
new_data1_c_probit$IV_label <- factor(new_data1_c_probit$milservice, labels = c("No Military Service", "Military Service"))

new_data2_c_probit$model <- "Combat Experience"
new_data2_c_probit$IV_label <- factor(new_data2_c_probit$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_c_probit$model <- "Military Service Without Combat Experience"
new_data3_c_probit$IV_label <- factor(new_data3_c_probit$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))

# Combine datasets
combined_data_c <- rbind(
  new_data1_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_c[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")]
)


htmlreg(list(probit_model1,
             probit_model2,
             probit_model3,
             probit_model1_c,
             probit_model2_c,
             probit_model3_c), file="moon_probit_regression_control_robustcheck.html")


# 4. Rare Event Logistic Regression ####
## 4.1 Baseline ####
# Fit the models without control variables
relogit_model1 <- logistf(Intervention_Intra_only ~ milservice, data = data)
relogit_model2 <- logistf(Intervention_Intra_only ~ combat, data = data)
relogit_model3 <- logistf(Intervention_Intra_only ~ milnocombat, data = data)

summary(relogit_model1)
summary(relogit_model2)
summary(relogit_model3)

# Create new datasets for prediction
new_data1_simple <- data.frame(
  milservice = c(0, 1)
)

new_data2_simple <- data.frame(
  combat = c(0, 1)
)

new_data3_simple <- data.frame(
  milnocombat = c(0, 1)
)


# Predict probabilities
new_data1_simple$predicted_prob <- predict(relogit_model1, newdata = new_data1_simple, type = "response")
new_data2_simple$predicted_prob <- predict(relogit_model2, newdata = new_data2_simple, type = "response")
new_data3_simple$predicted_prob <- predict(relogit_model3, newdata = new_data3_simple, type = "response")

# Add non-intervention probabilities
new_data1_simple$non_intervention_prob <- 1 - new_data1_simple$predicted_prob
new_data2_simple$non_intervention_prob <- 1 - new_data2_simple$predicted_prob
new_data3_simple$non_intervention_prob <- 1 - new_data3_simple$predicted_prob

# Add model and IV labels
new_data1_simple$model <- "Military Service"
new_data1_simple$IV_label <- factor(new_data1_simple$milservice, labels = c("No Military Service", "Military Service"))

new_data2_simple$model <- "Combat Experience"
new_data2_simple$IV_label <- factor(new_data2_simple$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3_simple$model <- "Military Service Without Combat Experience"
new_data3_simple$IV_label <- factor(new_data3_simple$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))

## Creating a plot with only Intervention (DV) ##
# Combine datasets
combined_data_simple_single <- rbind(
  new_data1_simple[, c("IV_label", "predicted_prob", "model")],
  new_data2_simple[, c("IV_label", "predicted_prob", "model")],
  new_data3_simple[, c("IV_label", "predicted_prob", "model")]
)

# Reshape data for plotting
library(tidyr)
plot_data_simple_single <- pivot_longer(combined_data_simple_single, cols = c(predicted_prob), 
                                        names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_simple_single$Outcome <- factor(plot_data_simple_single$Outcome, levels = c("predicted_prob"), 
                                          labels = c("Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_simple_single)

ggplot(plot_data_simple_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (with rare event correction)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() 

## Creating a plot with both Intervention and Non-intervention (DV) ##
# Combine datasets
combined_data_simple <- rbind(
  new_data1_simple[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2_simple[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3_simple[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")]
)

# Reshape data for plotting
library(tidyr)
plot_data_simple <- pivot_longer(combined_data_simple, cols = c(predicted_prob, non_intervention_prob), 
                                 names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_simple$Outcome <- factor(plot_data_simple$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                                   labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for baseline models
print(combined_data_simple)

ggplot(plot_data_simple, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (with rare event correction)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")

## 4.2 Controls ####
# Fit the models
relogit_model1_c <- logistf(Intervention_Intra_only ~ milservice + gender + age + polity + cinc + ColdWar + warwin, data = data)
relogit_model2_c <- logistf(Intervention_Intra_only ~ combat + gender + age + polity + cinc + ColdWar+ warwin, data = data)
relogit_model3_c <- logistf(Intervention_Intra_only ~ milnocombat + gender + age + polity + cinc + ColdWar+ warwin, data = data)
relogit_model4_c <- logistf(Intervention_Intra_only ~ combat + milnocombat + gender + age + polity + cinc + ColdWar+ warwin, data = data)

summary(relogit_model1_c)
summary(relogit_model2_c)
summary(relogit_model3_c)
summary(relogit_model4_c)

# Create new datasets for prediction
mean_gender <- mean(data$gender, na.rm = TRUE)
mean_age <- mean(data$age, na.rm = TRUE)
mean_polity <- mean(data$polity, na.rm = TRUE)
mean_cinc <- mean(data$cinc, na.rm = TRUE)
mean_ColdWar <- mean(data$ColdWar, na.rm = TRUE)

new_data1 <- data.frame(
  milservice = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

new_data2 <- data.frame(
  combat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

new_data3 <- data.frame(
  milnocombat = c(0, 1),
  gender = mean_gender,
  age = mean_age,
  polity = mean_polity,
  cinc = mean_cinc,
  ColdWar = mean_ColdWar
)

# Predict probabilities
new_data1$predicted_prob <- predict(relogit_model1_c, newdata = new_data1, type = "response")
new_data2$predicted_prob <- predict(relogit_model2_c, newdata = new_data2, type = "response")
new_data3$predicted_prob <- predict(relogit_model3_c, newdata = new_data3, type = "response")

# Add non-intervention probabilities
new_data1$non_intervention_prob <- 1 - new_data1$predicted_prob
new_data2$non_intervention_prob <- 1 - new_data2$predicted_prob
new_data3$non_intervention_prob <- 1 - new_data3$predicted_prob

# Add model and IV labels
new_data1$model <- "Military Service"
new_data1$IV_label <- factor(new_data1$milservice, labels = c("No Military Service", "Military Service"))

new_data2$model <- "Combat Experience"
new_data2$IV_label <- factor(new_data2$combat, labels = c("No Combat Experience", "Combat Experience"))

new_data3$model <- "Military Service Without Combat Experience"
new_data3$IV_label <- factor(new_data3$milnocombat, labels = c("No Military Service Without Combat", "Military Service Without Combat"))


## Creating a plot for only Intervention (DV) ##
combined_data_single <- rbind(
  new_data1[, c("IV_label", "predicted_prob", "model")],
  new_data2[, c("IV_label", "predicted_prob", "model")],
  new_data3[, c("IV_label", "predicted_prob", "model")]
)

# Reshape data for plotting
plot_data_single <- pivot_longer(combined_data_single, cols = c(predicted_prob), 
                                 names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data_single$Outcome <- factor(plot_data_single$Outcome, levels = c("predicted_prob"), 
                                   labels = c("Intervention"))

# Print predicted probabilities for models with controls
print(combined_data_single)

ggplot(plot_data_single, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention")) +
  labs(title = "Predicted Probabilities of Intervention by Leader Background (With Controls)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() 

## Creating a plot for both Intervention and Non-intervention (DV) ##
# Combine datasets
combined_data <- rbind(
  new_data1[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data2[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")],
  new_data3[, c("IV_label", "predicted_prob", "non_intervention_prob", "model")]
)

# Reshape data for plotting
plot_data <- pivot_longer(combined_data, cols = c(predicted_prob, non_intervention_prob), 
                          names_to = "Outcome", values_to = "Probability")

# Rename levels for better readability
plot_data$Outcome <- factor(plot_data$Outcome, levels = c("predicted_prob", "non_intervention_prob"), 
                            labels = c("Intervention", "Non-Intervention"))

# Print predicted probabilities for models with controls
print(combined_data)

ggplot(plot_data, aes(x = IV_label, y = Probability, fill = Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("skyblue", "orange"), labels = c("Intervention", "Non-Intervention")) +
  labs(title = "Predicted Probabilities of Intervention and Non-Intervention by Leader Background (With Controls)",
       x = "Leader Background",
       y = "Predicted Probability") +
  facet_wrap(~ model, scales = "free_x") +
  theme_minimal() +
  theme(legend.position = "bottom")
